Load packages
library(tidyverse)
library(sf)
library(osmdata)
library(ggmap)
library(httr)
library(jsonlite)
# unload packages
# detach("package:microbenchmark", unload=TRUE)
Resources
Vignette: https://cran.r-project.org/web/packages/osmdata/vignettes/osmdata.html
Map features: https://wiki.openstreetmap.org/wiki/Map_Features
?available_features() and ?available_tags([feature])
List of parameters:
Balt_bbox <- getbb("Baltimore") %>%
opq()
Query the bus stops in Baltimore.
Balt_busStops <- Balt_bbox %>%
add_osm_feature(key = "highway", value = "bus_stop") %>%
osmdata_sf() %>%
# keep only the points. Note that the query returned 4333 points, 9 polygons, and 1 multi-line feature
.$osm_points
What’s in the bus stops data?
glimpse(Balt_busStops)
## Rows: 4,333
## Columns: 31
## $ osm_id <chr> "37751877", "393491959", "414569919",...
## $ name <chr> "Philadelphia Road & Fontana Lane Opp...
## $ CCC.CORNER <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ amenity <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ bench <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ bin <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ brand <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ brand.wikidata <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ bus <chr> "yes", "yes", NA, "yes", NA, "yes", N...
## $ commuter_bus <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ covered <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ highway <chr> "bus_stop", "bus_stop", "bus_stop", "...
## $ intercity_bus <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ lit <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ local_ref <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ name.zh <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ network <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ note <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ operator <chr> "MDOT Maryland Transit Administration...
## $ passenger_information_display <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ public_transport <chr> "stop_position", "stop_position", NA,...
## $ ref <chr> "6632", "437", NA, NA, NA, NA, NA, NA...
## $ route_ref <chr> "36;56", "104", NA, NA, NA, NA, NA, N...
## $ shelter <chr> "yes", "no", NA, NA, NA, NA, NA, NA, ...
## $ shelter_type <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ source <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ surface <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ tactile_paving <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ website <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ wheelchair <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ geometry <POINT [°]> POINT (-76.488 39.33856), POINT...
Make a quick map
Balt_map <- get_map(getbb("Baltimore"), maptype = "toner-background")
ggmap(Balt_map) +
geom_sf(data = Balt_busStops,
inherit.aes = FALSE, # this is necessary
alpha = 0.5) +
labs(title = "Bus Stops in Baltimore",
caption = "Data source: OSM",
x = "",
y = "")
base_url <- "https://api.ohsome.org/v0.9"
api_metadata <- GET(url= paste(base_url, "/metadata", sep = "")) %>%
content("text") %>%
fromJSON()
The below tells us the database contains data from October 8, 2007 to May 23, 2020.
api_metadata$extractRegion$temporalExtent
## $fromTimestamp
## [1] "2007-10-08T00:00:00Z"
##
## $toTimestamp
## [1] "2020-05-23T03:00:00Z"
Use this endpoint to aggregate OSM data, e.g. counts, areas, lengths, and users (contributors).
Let’s look at the trends for mapping bus stops in Baltimore.
# api_bbox <- getbb("Baltimore") %>% bbox_to_string() # note that the order of the coords is flipped from what the database needs
balt_bbox <- "-76.770759, 39.1308816,-76.450759,39.4508816"
# api_bbox <- "8.6581,49.3836,8.7225,49.4363"
api_keys <- "highway"
api_values <- "bus_stop"
monthly <- "2007-11-01/2020-05-23/P1M"
api_data <- GET(url = paste(base_url, "/elements/count", sep = ""),
query = list(
bboxes = balt_bbox,
keys = api_keys,
values = api_values,
time = monthly))
busStops_hist <- content(api_data, as = "text") %>%
fromJSON() %>%
.$result
The query from osmdata showed 4333 bus stops in Baltimore currently. The OSHDB query shows 4241 bus stops as of May 1, 2020.
ggplot(busStops_hist,
aes(x = as.Date(timestamp),
y = value)) +
geom_line() +
theme_bw() +
labs(title = "Bus Stops in Baltimore Mapped In OSM Over Time",
caption = "Source: OSHDB, ohsome API",
x = "Date",
y = "Count")
Use this endpoint to pull historical snapshots of OSM features.
Code below is not the most elegant and the resulting dataframe seems odd - is there a better way to convert the API geoJSON response into sf?
# is there a more elegant way to do the below?
extraction_api_data <- GET(url= paste(base_url, "/elements/geometry", sep = ""),
query = list(
bboxes = balt_bbox,
keys = api_keys,
values = api_values,
types = "point", # do I want to limit it to points only?
time = monthly))
busStops_geom_hist <- read_sf(extraction_api_data)
busStops_geom_hist$X.snapshotTimestamp <- as.Date(busStops_geom_hist$X.snapshotTimestamp)
busStops_geom_hist
## Simple feature collection with 84352 features and 3 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: -76.77029 ymin: 39.13266 xmax: -76.45081 ymax: 39.44865
## geographic CRS: WGS 84
## # A tibble: 84,352 x 4
## X.osmId X.snapshotTimestamp highway geometry
## <chr> <date> <chr> <POINT [°]>
## 1 node/1806310072 2013-04-01 bus_stop (-76.46212 39.3776)
## 2 node/1806310072 2013-05-01 bus_stop (-76.46212 39.3776)
## 3 node/1806310072 2013-06-01 bus_stop (-76.46212 39.3776)
## 4 node/1806310072 2013-07-01 bus_stop (-76.46212 39.3776)
## 5 node/1806310072 2013-08-01 bus_stop (-76.46212 39.3776)
## 6 node/1806310072 2013-09-01 bus_stop (-76.46212 39.3776)
## 7 node/1806310072 2013-10-01 bus_stop (-76.46212 39.3776)
## 8 node/1806310072 2013-11-01 bus_stop (-76.46212 39.3776)
## 9 node/1806310072 2013-12-01 bus_stop (-76.46212 39.3776)
## 10 node/1806310072 2014-01-01 bus_stop (-76.46212 39.3776)
## # ... with 84,342 more rows
# busStops_geom_hist <- content(extraction_api_data, as = "text") %>%
# fromJSON()
# write(busStops_geom_hist %>% toJSON(), "test.json")
According to our aggregated data, there was a massive spike in bus stops in Baltimore on OSM at the beginning of February, 2019: from 280 to 3968.
First, does the geometry data have the same number of observations as the aggregated data? It’s very close - it may be a matter of the time of day queried.
busStops_changeMap <- busStops_geom_hist %>%
filter(X.snapshotTimestamp %in% as.Date(c("2019-02-01", "2019-01-01")))
busStops_changeMap %>% group_by(X.snapshotTimestamp) %>%
summarize(count = n())
## Simple feature collection with 2 features and 2 fields
## geometry type: MULTIPOINT
## dimension: XY
## bbox: xmin: -76.77029 ymin: 39.13266 xmax: -76.45081 ymax: 39.44865
## geographic CRS: WGS 84
## # A tibble: 2 x 3
## X.snapshotTimesta~ count geometry
## <date> <int> <MULTIPOINT [°]>
## 1 2019-01-01 272 ((-76.7639 39.18362), (-76.76289 39.31459), (-76.761~
## 2 2019-02-01 3960 ((-76.77029 39.41797), (-76.77025 39.41795), (-76.77~
Next, compare to two months side-by-side on a map.
ggmap(Balt_map) +
geom_sf(data = busStops_changeMap,
inherit.aes = FALSE,
alpha = 0.5) +
labs(title = "Change in Bus Stops Mapped on OSM in Baltimore",
subtitle = "Number of bus stops jumped from 280 in January 2019 to 3968 in February 2019.",
caption = "Data source: OSHDB",
x = "",
y = "") +
facet_wrap(~ X.snapshotTimestamp)